Read in data:

# Data:
CC_dat <- read.csv("/Users/kellyloria/Documents/UNR/Randomness\ readings/CausalityClass/CM_data.csv") 

Optional: Create arbitrary variable for temporal autocorelation in plant size?

# Create lagged variable
CC_dat <- CC_dat %>%
  mutate(lag_plant_size = lag(plant_size, default = NA))

Check out parameter covariance:

# covariance correlations 
chart.Correlation(CC_dat, histogram=TRUE, pch=19)
## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

Look at linear regression in a “global model”

# saturated model:
hist(CC_dat$herbivory)

glb_mod <- lm(herbivory~scale(fertilizer) + scale(light) + scale(plant_size) + 
                scale(lag_plant_size) + scale(foraging), 
              data=CC_dat)
summary(glb_mod)
## 
## Call:
## lm(formula = herbivory ~ scale(fertilizer) + scale(light) + scale(plant_size) + 
##     scale(lag_plant_size) + scale(foraging), data = CC_dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4817 -0.6490  0.0069  0.6568  3.4223 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -0.01646    0.03209  -0.513   0.6081    
## scale(fertilizer)      0.03942    0.04029   0.979   0.3280    
## scale(light)           0.26929    0.04574   5.887 5.38e-09 ***
## scale(plant_size)      0.64003    0.04273  14.980  < 2e-16 ***
## scale(lag_plant_size) -0.06740    0.03214  -2.097   0.0362 *  
## scale(foraging)        1.00974    0.03892  25.944  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.014 on 993 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.7105, Adjusted R-squared:  0.709 
## F-statistic: 487.3 on 5 and 993 DF,  p-value: < 2.2e-16
hist(residuals(glb_mod))

vif(glb_mod) # light looks like highest leverage 
##     scale(fertilizer)          scale(light)     scale(plant_size) 
##              1.575339              2.031419              1.771903 
## scale(lag_plant_size)       scale(foraging) 
##              1.001940              1.470318
hist(CC_dat$herbivory)

glb_mod1 <- lm(herbivory~scale(fertilizer) + scale(light) + scale(plant_size) + scale(foraging), 
              data=CC_dat)
summary(glb_mod)
## 
## Call:
## lm(formula = herbivory ~ scale(fertilizer) + scale(light) + scale(plant_size) + 
##     scale(lag_plant_size) + scale(foraging), data = CC_dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4817 -0.6490  0.0069  0.6568  3.4223 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -0.01646    0.03209  -0.513   0.6081    
## scale(fertilizer)      0.03942    0.04029   0.979   0.3280    
## scale(light)           0.26929    0.04574   5.887 5.38e-09 ***
## scale(plant_size)      0.64003    0.04273  14.980  < 2e-16 ***
## scale(lag_plant_size) -0.06740    0.03214  -2.097   0.0362 *  
## scale(foraging)        1.00974    0.03892  25.944  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.014 on 993 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.7105, Adjusted R-squared:  0.709 
## F-statistic: 487.3 on 5 and 993 DF,  p-value: < 2.2e-16
hist(residuals(glb_mod1))

vif(glb_mod1) # light looks like highest leverage 
## scale(fertilizer)      scale(light) scale(plant_size)   scale(foraging) 
##          1.573452          2.031273          1.770796          1.468256
AIC(glb_mod, glb_mod1) # ? better with fake lag...
## Warning in AIC.default(glb_mod, glb_mod1): models are not all fitted to the
## same number of observations
##          df      AIC
## glb_mod   7 2871.477
## glb_mod1  6 2876.204

Look at Jonathan Lefcheck’s Piecewise structural equation examples: Link: https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.12512 Link: https://jslefche.github.io/sem_book/

# Break down component regressions
plant_size_mod <- lm(plant_size ~  fertilizer +  light,
                     data = CC_dat)
summary(plant_size_mod)
## 
## Call:
## lm(formula = plant_size ~ fertilizer + light, data = CC_dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2049 -0.6430 -0.0068  0.6828  3.3435 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0007139  0.0307122   0.023    0.981    
## fertilizer  0.4967008  0.0363250  13.674   <2e-16 ***
## light       0.5051542  0.0356793  14.158   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9704 on 997 degrees of freedom
## Multiple R-squared:  0.4349, Adjusted R-squared:  0.4338 
## F-statistic: 383.7 on 2 and 997 DF,  p-value: < 2.2e-16
herb_mod <- lm(herbivory ~ plant_size + light + foraging,
                data = CC_dat)
summary(herb_mod)
## 
## Call:
## lm(formula = herbivory ~ plant_size + light + foraging, data = CC_dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5185 -0.6653  0.0023  0.6761  3.6133 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.004691   0.032127   0.146    0.884    
## plant_size  0.508836   0.030435  16.719  < 2e-16 ***
## light       0.283461   0.045091   6.286 4.85e-10 ***
## foraging    0.828113   0.032026  25.857  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.016 on 996 degrees of freedom
## Multiple R-squared:  0.709,  Adjusted R-squared:  0.7081 
## F-statistic: 808.9 on 3 and 996 DF,  p-value: < 2.2e-16
# Use the `psem` function to create the SEM
CC_psem_model <- psem(plant_size_mod, herb_mod)
## Warning: NAs detected in the dataset. Consider removing all rows with NAs to
## prevent fitting to different subsets of data
# Look at object
CC_psem_model 
## Structural Equations of x :
## lm: plant_size ~ fertilizer + light
## lm: herbivory ~ plant_size + light + foraging
## 
## Data:
##   fertilizer      light plant_size   foraging  herbivory lag_plant_size
## 1  0.5208573 -0.3872029 -0.7957658 -0.6570881 -1.6942758             NA
## 2  0.8155222 -0.1671887 -1.4537077  0.4605768 -0.7142346     -0.7957658
## 3  0.8835973 -1.4635119 -0.5343125 -2.0035209 -2.9674024     -1.4537077
## 4 -0.2264909 -1.4187651  0.1868491  0.4587782 -0.1949140     -0.5343125
## 5 -1.4803645 -0.1481271 -0.9388211 -0.1277455 -0.5076546      0.1868491
## 6  0.5107644  1.5497233 -0.3658381  0.9979259  1.2264027     -0.9388211
## ...with  994  more rows
## 
## [1] "class(psem)"
# Use `summary` function to get all information at once
summary(CC_psem_model)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================================================| 100%
## 
## Structural Equation Model of CC_psem_model 
## 
## Call:
##   plant_size ~ fertilizer + light
##   herbivory ~ plant_size + light + foraging
## 
##     AIC
##  5657.861
## 
## ---
## Tests of directed separation:
## 
##                 Independ.Claim Test.Type  DF Crit.Value P.Value 
##   herbivory ~ fertilizer + ...      coef 995     0.9813  0.3267 
##    plant_size ~ foraging + ...      coef 996     0.7844  0.4330 
## 
## --
## Global goodness-of-fit:
## 
## Chi-Squared = 1.585 with P-value = 0.453 and on 2 degrees of freedom
## Fisher's C = 3.911 with P-value = 0.418 and on 4 degrees of freedom
## 
## ---
## Coefficients:
## 
##     Response  Predictor Estimate Std.Error  DF Crit.Value P.Value Std.Estimate
##   plant_size fertilizer   0.4967    0.0363 997    13.6738       0       0.3747
##   plant_size      light   0.5052    0.0357 997    14.1582       0       0.3880
##    herbivory plant_size   0.5088    0.0304 996    16.7190       0       0.3490
##    herbivory      light   0.2835    0.0451 996     6.2864       0       0.1493
##    herbivory   foraging   0.8281    0.0320 996    25.8573       0       0.5355
##      
##   ***
##   ***
##   ***
##   ***
##   ***
## 
##   Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05
## 
## ---
## Individual R-squared:
## 
##     Response method R.squared
##   plant_size   none      0.43
##    herbivory   none      0.71
# Plot SEM with standardized coefficients
plot(CC_psem_model)
# Get P-values
P1 <- summary(plant_size_mod)$coefficients[2, "Pr(>|t|)"]
P2 <- summary(herb_mod)$coefficients[2, "Pr(>|t|)"]

# Construct C-statistic
C <- -2 * (log(P1) + log(P2))
C
## [1] 429.1085
fisherC(CC_psem_model) 
##   Fisher.C df P.Value
## 1    3.911  4   0.418
# Compare to chi-squared distribution with 2*2 degrees of freedom
1 - pchisq(C, 4) # P < 0.05 == poor fit!
## [1] 0
# Can use `dsep` function to perform the tests automatically
dSep(CC_psem_model)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================================================| 100%
##                 Independ.Claim Test.Type  DF Crit.Value   P.Value 
## 1 herbivory ~ fertilizer + ...      coef 995  0.9812831 0.3266917 
## 2  plant_size ~ foraging + ...      coef 996  0.7843585 0.4330163

Fit Bayesian models

plant_size_b <- bf(plant_size ~  fertilizer +  light)
herb_b <- bf(herbivory ~ plant_size + light + foraging)

CC_fit_brms <- brm(plant_size_b +
                     herb_b + 
                     set_rescor(FALSE), 
                   data = CC_dat,
                   iter = 5000, warmup = 2500,
                   cores = 3, chains = 3)
## Warning: In subset.data.frame(x$frame$re, resp = x$resp) :
##  extra argument 'resp' will be disregarded

## Warning: In subset.data.frame(x$frame$re, resp = x$resp) :
##  extra argument 'resp' will be disregarded
## Compiling Stan program...
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 15.0.0 (clang-1500.1.0.2.5)’
## using SDK: ‘MacOSX14.2.sdk’
## clang -arch x86_64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/x86_64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
## #include <cmath>
##          ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
## Start sampling
## Warning: In subset.data.frame(x$frame$re, resp = x$resp) :
##  extra argument 'resp' will be disregarded

## Warning: In subset.data.frame(x$frame$re, resp = x$resp) :
##  extra argument 'resp' will be disregarded

Trace plots

plot(CC_fit_brms)

Extract posterior estimates for mean path coefficients

# Extract posterior summaries
posterior_summaries <- posterior_summary(CC_fit_brms)

#  Extract estimates and CIs for paths
estimates <- posterior_summaries[, "Estimate"]
ci_lower <- posterior_summaries[, "Q2.5"]
ci_upper <- posterior_summaries[, "Q97.5"]

# Create path labels
fertilizer_to_plant_size <- paste0("mean = ", round(estimates["b_plantsize_fertilizer"], 2), 
                                   ", CI = [", round(ci_lower["b_plantsize_fertilizer"], 2), 
                                   ", ", round(ci_upper["b_plantsize_fertilizer"], 2), "]")
light_to_plant_size <- paste0("mean = ", round(estimates["b_plantsize_light"], 2), 
                              ", CI = [", round(ci_lower["b_plantsize_light"], 2), 
                              ", ", round(ci_upper["b_plantsize_light"], 2), "]")
plant_size_to_herbivory <- paste0("mean = ", round(estimates["b_herbivory_plant_size"], 2), 
                                  ", CI = [", round(ci_lower["b_herbivory_plant_size"], 2), 
                                  ", ", round(ci_upper["b_herbivory_plant_size"], 2), "]")
light_to_herbivory <- paste0("mean = ", round(estimates["b_herbivory_light"], 2), 
                             ", CI = [", round(ci_lower["b_herbivory_light"], 2), 
                             ", ", round(ci_upper["b_herbivory_light"], 2), "]")
foraging_to_herbivory <- paste0("mean = ", round(estimates["b_herbivory_foraging"], 2), 
                                ", CI = [", round(ci_lower["b_herbivory_foraging"], 2), 
                                ", ", round(ci_upper["b_herbivory_foraging"], 2), "]")

Ploy using grViz

# Create the DAG plot
dag_plot <- grViz(paste0("
  digraph DAG {
    graph [layout = dot, rankdir = TB]

    node [shape = box]
    fertilizer -> plant_size [label = '", fertilizer_to_plant_size, "']
    light -> plant_size [label = '", light_to_plant_size, "']
    plant_size -> herbivory [label = '", plant_size_to_herbivory, "']
    light -> herbivory [label = '", light_to_herbivory, "']
    foraging -> herbivory [label = '", foraging_to_herbivory, "']

    plant_size [label = 'Plant Size']
    fertilizer [label = 'Fertilizer']
    light [label = 'Light']
    herbivory [label = 'Herbivory']
    foraging [label = 'Foraging']
  }
"))

dag_plot